library(rvest)
library(stringr)
library(plyr)
library(googleway)
好久沒上傳更新了,這段期間其實都有在充實AI的知識,也進行了一些實作,deep learning 真的是 deep stack,做中學可以體會特別深刻,之後再來補上心得。過年期間,發生了華航機師聯合罷工,期間約2.5萬名旅客受到影響,剛好看到google map的應用,心血來潮、剛好趁這個機會練習一下,也給自己一些更新網站的動力。這次視覺化的實作很簡單,主要分為四個步驟:
首先,利用rvest套件read_html方法來擷取華航罷工的新聞資訊,其中CI開頭是影響的航班,後面的三個字母是機場代碼(IATA code),共取得140筆航班資訊
#Pilot Union strike
news <- read_html('https://www.china-airlines.com/au/en/discover/news/press-release/announcement')
news_text <- news %>%
html_nodes("td") %>%
html_nodes("p") %>%
html_text(trim = T)
#CI begin only
news_text <- str_subset(news_text, "CI")
news_text <- sapply(news_text, function(x){
end <- str_locate(x, "-")[1] + 4
str_sub(x, start = 1L, end = end)
})
attr(news_text, 'names') <- NULL
news_text <- gsub(" -", "", news_text)
#add ID, convert to dataframe
news_df <- do.call(rbind, strsplit(news_text, ' '))
news_df <- cbind(ID = seq_len(nrow(news_df)), as.data.frame(news_df, stringsAsFactors =FALSE))
colnames(news_df) <- c('ID', 'Flight','Airport_S','Airport_E')news_df 前3筆資料
## ID Flight Airport_S Airport_E
## 1 1 CI053/054 BNE AKL
## 2 2 CI058 MEL TPE
## 3 3 CI011 JFK TPE
接下來,透過 www.world-airport-codes.com 網站,使用IATA code查詢機場的經緯度
#iata code to lat lon
IATA2LatLon <- function(iata_codes) {
latlon <- lapply(iata_codes, function(x){
cat('iata_code =', x , '\n')
IATA <- read_html(paste0('https://www.world-airport-codes.com/search/?s=', x))
#node
IATA_node <- IATA %>%
html_nodes("span.airportAttributeValue")
#data-key
IATA_data_key <- html_attr(IATA_node, 'data-key')
#緯度,經度位置
idx_lat <- which(IATA_data_key=='Latitude')
idx_lon <- which(IATA_data_key=='Longitude')
#緯度,經度
lat <- html_attr(IATA_node, 'data-value')[idx_lat]
lon <- html_attr(IATA_node, 'data-value')[idx_lon]
return(c(iata = x, lat = lat, lon = lon))
})
return(latlon)
}
#get lat lon by iata
unique_iata <- unique(c(news_df$Airport_S, news_df$Airport_E))
latlon_iata <- IATA2LatLon(unique_iata)
latlon_iata <- data.frame(do.call(rbind, latlon_iata), stringsAsFactors = FALSE)
latlon_iata$lat <- as.numeric(latlon_iata$lat)
latlon_iata$lon <- as.numeric(latlon_iata$lon)latlon_iata 前3筆資料
## iata lat lon
## 1 BNE -27.3842 153.1170
## 2 MEL -37.6733 144.8430
## 3 JFK 40.6398 -73.7789
將news_df, latlon_iata合併
#join
flights_iata <- merge(news_df, latlon_iata, by.x = 'Airport_S', by.y = 'iata')
colnames(flights_iata)[5:6] <- c('Lat_S', 'Lon_S')
flights_iata <- merge(flights_iata, latlon_iata, by.x = 'Airport_E', by.y = 'iata')
colnames(flights_iata)[7:8] <- c('Lat_E', 'Lon_E')利用googleway套件中encode_pl方法,將經緯度編碼成polyline
#encode the routes as polylines
polylines_lst <- lapply(unique(flights_iata$ID), function(x){
lat = c(flights_iata[flights_iata["ID"] == x, c("Lat_S")], flights_iata[flights_iata["ID"] == x, c("Lat_E")])
lon = c(flights_iata[flights_iata["ID"] == x, c("Lon_S")], flights_iata[flights_iata["ID"] == x, c("Lon_E")])
data.frame(id = x, polyline = encode_pl(lat = lat, lon = lon), stringsAsFactors = FALSE)
})
#join
flights_iata <- merge(flights_iata, do.call(rbind, polylines_lst), by.x = 'ID', by.y = "id")flights_iata 前3筆資料,到這裡資料已經準備完成
## ID Airport_E Airport_S Flight Lat_S Lon_S Lat_E Lon_E
## 1 1 AKL BNE CI053/054 -27.3842 153.1170 -37.0081 174.792
## 2 2 TPE MEL CI058 -37.6733 144.8430 25.0777 121.233
## 3 3 TPE JFK CI011 40.6398 -73.7789 25.0777 121.233
## polyline
## 1 dncfDgtpe\\jtvy@wkhcC
## 2 ba}dFws`sZu`_~JniboC
## 3 wm`wFb}haMdn~}Akgged@
地圖風格設定,有Standard, Silver, Retro, Dark, Night, Aubergine等主題可選。最後輸入API key,繪製google map
#map style
style <- map_styles()$aubergine
#API key
map_key <- "your_api_key"
#plot
google_map(key = map_key, style = style) %>%
add_polylines(data = flights_iata, polyline = "polyline", mouse_over_group = "Airport_S",
stroke_weight = 1.5, stroke_opacity = 0.5, stroke_colour = "#FF99FF")華航罷工影響航班的飛行路線分佈
往返TPE影響最多,日本路線影響頻繁
小港往返大阪、上海、香港及馬尼拉受到影響
台南往返大阪受影響